
clear all

% ********************************************************************
% load and determine data 
% ********************************************************************
% Data file must contain variable 'data' with the phenotype in the 
% first column and the other factors in the rest of the columns
% Also, the data file must contain variable 'remember' with the names of
% the phenotype and the factors in the order they appear in 'data'
load 'pretend_genair_data'
y=data(:,1); % the phenotype (0:absence, 1:presence)
% In the data set, the levels of each factor must be denoted as 0,1,2,3,...
x=[data(:,2:8) data(:,9:10)]; % the risk factors
nuoflevels=[2 4 3 4 3 3 2 2 3]; % number of levels for each risk factor
indicateordinal=[0 1 1 1 1 1 0 0 1]; % determines if a risk factor is categorical (0) or ordinal (1)
vectorofx={remember{2:8} remember{9:10}}; % names of risk factors
wconf=[10*rand(size(x,1),1) binornd(1,0.5,size(x,1),1)]; % 2 random confounders (continuous and binary)

% ************************************************************************
% displays average frequencies for y and the x's (and creates vector
% averagex for reproduceparam4)
% ************************************************************************
[averagey,averagex]=genaverage(x,y,nuoflevels,vectorofx);
% ************************************************************************

% ***********************************************************************
% determine MCMC related parameters
% ***********************************************************************
iterations=1000; thin=4; burnin=1000; maxnumofclusters=15; 
% iterations: number of saved iterations
% thin:number of additional iterations for every one that is saved
% therefore the total number of iterations is iterations*thin
% burnin: burn in number of iterations (no thining)
% maxnumofclusters: maximum number of clusters

% ***********************************************************************                         
% calling main MCMC sampling routine
% ***********************************************************************
[clustermatrixsave,clusterallocselectind,riskthetasave,clusterallocsave,psicsave,phisave,switchessave,rhosave,alphaforpsisave,betaconfsave]=...
   mainnointwithordinalarand(y,x,wconf,nuoflevels,burnin,iterations,thin,maxnumofclusters,indicateordinal);

% ************************************************************************
% obtaining a sample for the risk of a new profile
% ************************************************************************
% x1=1;x2=1;x3=1;x4=1;x5=1; x6=1; x7=1;
% xnew=[x1 x2 x3 x4 x5 x6 x7];
%[risknewprofile]=postprocessnewprofile(xnew,nuoflevels,psicsave,phisave,riskthetasave,iterations,maxnumofclusters);
% % PLOT THE RISK FOR THE NEW PROFILE **************************************
% figure(5);
% hist(risknewprofile(:,1));
% title('risk for new profile');
% disp(['Average risk for ' num2str(xnew) 'profile =' num2str(mean(risknewprofile))]); 

% ************************************************************************
% obtaining a sample for the difference in the risk of 2 new profiles
% ************************************************************************
% xnew1=[1 1 1 0 0 0 1]; xnew2=[1 1 1 0 0 0 0];
%[risknewprofilediff]=postprocessdiffoftwoprofiles(xnew1,xnew2,nuoflevels,psicsave,phisave,riskthetasave,iterations,maxnumofclusters);

% ************************************************************************
% PRODUCE THE DENDROGRAM AND CHOOSE A HIERARHICAL CLUSTERING 
% ************************************************************************
choosenuofclusters=3;  % number of clusters (after we see the dendrogram)
[chosentreeclustering]=producetree(clustermatrixsave,choosenuofclusters);
% ************************************************************************

% ************************************************************************
% PRODUCE the PAM clustering
% ************************************************************************
dissmatrixvector=clustermatrixsave(2,1);
for itemp=3:size(clustermatrixsave,1) % produces the argument for the pam function. needs lower triangular matrix turned into a vector rowwise
    dissmatrixvector=[dissmatrixvector clustermatrixsave(itemp,1:itemp-1)];
end
dissmatrixvector=1-dissmatrixvector; 
%
averageslhouettewidth=0;
for ipam=2:15 % increase maximum ipam value if data support a large number of clusters
[pamclustering]=pam(dissmatrixvector,ipam); % no more than two arguments for diss matrix
if pamclustering.ttsyl>averageslhouettewidth
    choosenuofclusters=ipam
    chosenpamclustering=pamclustering.ncluv';
    averageslhouettewidth=pamclustering.ttsyl
end
end

% ************************************************************************
% POST-PROCESSING
% REPRODUCES THE PSI, PHI, THETA PARAMETERS FOR THE CHOSEN CLUSTERING 
% ************************************************************************
%chosenclustering=clusterallocsave(clusterallocselectind,:); % THE LEAST SQUARES CLUSTERING
%chosenclustering=chosentreeclustering'; % THE HIERARHICAL CLUSTERING
chosenclustering=chosenpamclustering'; % THE PAM CLUSTERING
[psicfinal,psicdiff,phifinal,phidiff,thetafinal]=reproduceparam6logistic(...
    nuoflevels,vectorofx,averagey,averagex,chosenclustering,...
    clusterallocsave,riskthetasave,psicsave,phisave,iterations,maxnumofclusters);
